R/scraper functions/gov_search.R

Defines functions gov_search gov_search_json_get

Documented in gov_search gov_search_json_get

gov_search_json_get <- function(q) {
  # Preparing the URL 
  url <- modify_url("https://www.gov.uk", path = "api/search.json", query=q)
  
  # API requests
  response <- GET(url)
  
  # Tracking errors
  if ( http_error(response) ){
    print(status_code(response))
    stop("Something went wrong.", call. = FALSE)
  }
  
  if (http_type(response) != "application/json") {
    stop("API did not return json", call. = FALSE)
  }
  
  # Extracting content
  json_text <- content(response, "text")
  
  # Converting content into Dataframe
  dataframe <- jsonlite::fromJSON(json_text)
  
  dataframe
}


# https://www.gov.uk/api/search.json?q=taxes
# Gov.uk search api
gov_search <- function(search_term='"workforce management information"') {
  query_list = list(q=search_term, # search term to use
                    filter_format="publication", filter_detailed_format="transparency-data", # filters on document type
                    fields="title", fields="organisations", fields="link", fields="public_timestamp") # fields to return
  
  # Count results as max return is 1000  
  search_n <- gov_search_json_get(q=append(query_list,list(count=0)))$total
  if (search_n>=10000) {
    stop("More than 10 pages of results, refine search")
  }
  
  # iterator to get full results set
  result_starts <- (1:(ceiling(search_n/1000))-1)*1000
  
  # Get all results and combine
  map(result_starts,~ gov_search_json_get(q=append(query_list,list(start=.,count=1000)))$results) %>%
    bind_rows
}

# a <- gov_search('"workforce management information"')
co-analysis/mwmi.govuk.scraper documentation built on Sept. 13, 2023, 7:28 a.m.